import requests
from lxml import etree
import re

if __name__=="__main__":
    url='http://10.89.23.253:48090/archives/xiao-yuan-zi-shu-xue-de-liao-a'
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0',
        'Cookie':'csrftoken=9qZ5U0IoCqEQV7mQsEdQS9H4vxzJV7SF;sessionid=axriqxnwys2u6xnde7nb3330yv3100xl;Trim-MC-token=ea7b4f8dd89b4f22b47d58a6fd49c87a; lastLoginUsername=zyd;fnos-token=i94hDBfhBWgjdvAkmaydVNR6f1JyabyCubTo62A4YPI=;XSRF-TOKEN=4fabb28f-97d1-42dc-bb24-ffe735e63643'
    }
    page_text=requests.get(url=url,headers=headers).text#获取网页的html代码
    tree=etree.HTML(page_text)#实例化etree对象
    p_list = tree.xpath('//main/section/article/p')#获取html中class为"render-html  single_code_select line-numbers"的article里的所有p标签，这些标签是一个列表
    for p in p_list:#遍历这些p标签，进行解析，找里面的图片
        img_src = p.xpath('./span')
        if len(img_src)>0:
            print(etree.tostring(img_src[0], encoding='utf-8').decode('utf-8'))
        '''
        if len(img_src)>0:
            img_src='http://10.89.23.253:48090/'+ etree.tostring(img_src[0], encoding='utf-8').decode('utf-8')
        else:
            continue
        match = re.search(r'([^/]+)$', img_src)
        filename = match.group(1) if match else None
        print(img_src,filename)
        '''
